In this part, we fit PCAWG to TCGA signatures.
library(sigminer)
tcga_cn_sigs_CN176_signature <- readRDS("../data/TCGA/tcga_cn_sigs_CN176_signature.rds")
pcawg_cn_tally_X <- readRDS("../data/pcawg_cn_tally_X.rds")
pcawg_matrix <- t(pcawg_cn_tally_X$nmf_matrix)
pcawg_fit_qp <- sig_fit_bootstrap_batch(
pcawg_matrix,
sig = tcga_cn_sigs_CN176_signature,
methods = c("QP"),
n = 100L,
min_count = 1L,
p_val_thresholds = c(0.05),
use_parallel = 12,
type = c("absolute")
)
saveRDS(pcawg_fit_qp, file = "../data/pcawg_fit_qp.rds")observed segment catalog profile
pcawg_fit_qp <- readRDS("../data/pcawg_fit_qp.rds")
pcawg_tcga_sample <- readRDS("../data/pcawg_sample_tidy_info.rds")
pcawg_tcga <- readRDS("../data/pcawg_tcga_type.rds") %>% distinct()
choose_fit_sample <- subset(
pcawg_fit_qp[["cosine"]],
sample %in% pcawg_tcga_sample$sample
)
s <- "SP112827"
tally_x <- readRDS("../data/pcawg_cn_tally_X.rds")
tcga_sig <- readRDS("../data/TCGA/tcga_cn_sigs_CN176_signature.rds")
pcawg_sig <- readRDS("../data/pcawg_cn_sigs_CN176_signature.rds")p <- show_catalogue(t(tally_x$nmf_matrix),
samples = s,
style = "cosmic", mode = "copynumber", method = "X",
# by_context = TRUE, font_scale = 0.7,
normalize = "row"
)
preconstructed catalog profile
pcawg_fit_expo <- pcawg_fit_qp$expo %>%
subset(., type == "optimal") %>%
dplyr::select(., sample, sig, exposure) %>%
reshape2::dcast(., sig ~ sample) %>%
tibble::column_to_rownames(., var = "sig") %>%
as.matrix()
reconstructed_mat <- tcga_sig$Signature.norm %*% pcawg_fit_expop <- show_catalogue(reconstructed_mat,
samples = s,
style = "cosmic", mode = "copynumber", method = "X",
normalize = "row"
)
pThe similarity of observed profile of reconstructed profile.
cosine(reconstructed_mat[, s], t(tally_x$nmf_matrix)[, s])[1] 0.9695092
p <- show_sig_profile(tcga_sig,
sig_names = paste0("Sig", c(1, 2, 10, 13)),
style = "cosmic", mode = "copynumber", method = "X",
)
ppcawg_sig <- readRDS("../data/pcawg_cn_sigs_CN176_signature.rds")
p <- show_sig_profile(
pcawg_sig,
sig_names = factor(paste0("CNS", c(2, 3, 7, 8, 10, 11)),
levels = paste0("CNS", c(10, 3, 2, 8, 7, 11))
),
style = "cosmic",
mode = "copynumber",
method = "X"
)
p